# -*- coding: utf-8 -*-
import re  
"""
附件表格word文档的格式会将表格前后加上表格开始，表格结束字样来确认表格位置，数据用“|”分隔
思路：
获取表头
获取表格数据总数
表格数据总数/表头获取行数
获取每行数据
根据表头获取相关数据

横表：
获取每行数据
根据关键词获取其所在位置的下一个位置的数据为其对应数据
横竖表判定
除了第一行，其余行是否含有关键词
竖表就用关键词所在位置的下一个数据作为标签的结果
"""


def get_doc_table(input_doc, table_dict):
    table_result = {}
    res = re.findall('表格开始---\n(.*?)\n---表格结束', input_doc, re.S)
    try:
        if len(res) > 0:
            for i in range(len(res)):
                table_list = res[i].split('\n')
                result_dict = {}
                for i in range(len(table_list)):
                    n = i+1
                    line_list = table_list[i].split('|')
                    result_dict[n] = line_list
                H_table_stat = False
                for lab_word_list in table_dict.values():
                    for line, value_list in result_dict.items():
                        if line != 1 and len(value_list) < 5:
                            for word in lab_word_list:
                                if word in value_list:
                                    H_table_stat = True
                                    
                HX_table_status = False
                if len(result_dict[1]) > 2:
                    if "候选人" in result_dict[1][1] and "候选人" in result_dict[1][2]:
                        HX_table_status = True
    
                if H_table_stat:
                    for lab, word_list in table_dict.items():
                        for word in word_list:
                            
                            if HX_table_status:
                                    for line, tab_val in result_dict.items():
                                        if line !=1:
                                            if word in tab_val and "排序" not in tab_val:
                                                val_ind = tab_val.index(word) + 1
                                                if val_ind <= len(tab_val):
                                                    if lab == "中标金额":
                                                        if "万元" in tab_val[val_ind-1] and "万元" not in tab_val[val_ind]:
                                                            table_result[lab] = tab_val[val_ind]+"万元"
                                                        else:
                                                            table_result[lab] = tab_val[val_ind]
                                                    else:
                                                        table_result[lab] = tab_val[val_ind]
                                    if lab in table_result:
                                        break
                            else:
                                for line,tab_val in result_dict.items():
                                    if word in tab_val and "排序" not in tab_val:
                                        val_ind = tab_val.index(word) + 1
                                        if val_ind <= len(tab_val):
                                            if lab == "中标金额":
                                                if "万元" in tab_val[val_ind-1] and "万元" not in tab_val[val_ind]:
                                                    table_result[lab] = tab_val[val_ind]+"万元"
                                                else:
                                                    table_result[lab] = tab_val[val_ind]
                                            else:
                                                table_result[lab] = tab_val[val_ind]
                                            break
                                if lab in table_result:
                                    break
                else:
                    for lab, word_list in table_dict.items():
                        for word in word_list:
                            for tab_val in result_dict[1]:
                                if lab == "中标金额":
                                    if word in tab_val and "控制" not in tab_val and "预算" not in tab_val:
                                        ind = result_dict[1].index(tab_val)
                                        if "万元" in tab_val and "万元" not in result_dict[2][ind]:
                                            table_result[lab] = result_dict[2][ind] + "万元"
                                            break
                                        else:
                                            table_result[lab] = result_dict[2][ind]
                                            break
                                else:
                                    if word in tab_val and "排序" not in tab_val:
                                        ind = result_dict[1].index(tab_val)
                                        if "万元" in tab_val and "万元" not in result_dict[2][ind]:
                                            table_result[lab] = result_dict[2][ind] + "万元"
                                        else:
                                            table_result[lab] = result_dict[2][ind]
                                        break
                            if lab in table_result:
                                break
                if len(table_result) > 0:
                    break
    except Exception as e:
        1
    return table_result